## descriptive stats
library(ggplot2)
library(dplyr)
# Figure 1: Aid data

westedu5 = read.csv("westedu_chaid_oct24.csv")

# Filter and select relevant columns, and remove missing values
data_plot <- westedu5 %>%
  dplyr::select(year, Country, region2, log.total.volume2) %>%
  na.omit()

# Calculate the mean of log.total.volume2 by year and country
data_grouped <- data_plot %>%
  group_by(year, region2) %>%
  summarise(log_total_volume2 = mean(log.total.volume2))
t <- as.data.frame(data_grouped)

# Calculate the yearly mean of log.total.volume2 across all countries
yearly_mean <- data_grouped %>%
  group_by(year) %>%
  summarise(mean_log_total_volume2 = mean(log_total_volume2, na.rm=TRUE))

# Plotting
g.1 <- ggplot(data_grouped, aes(x = as.integer(year), y = log_total_volume2)) +
  geom_line(aes(group=region2),color = "grey", alpha = 0.7) +  # Individual country lines
  geom_line(data = yearly_mean, aes(x = as.integer(year), y = mean_log_total_volume2),
            color = "black", size = 1.2) +  # Mean line
  labs(title = "",
       x = "Year",
       y = "Logged Chinese Official Finance") +
  theme_bw()

g.1

g.1.2 <- ggplot(data_grouped, aes(x = as.integer(year), y = log_total_volume2)) +
  geom_line(data = yearly_mean, aes(x = as.integer(year), y = mean_log_total_volume2),
            color = "black", size = 0.6) +  # Mean line
  labs(title = "",
       x = "Year",
       y = "Logged Chinese Official Finance (constant 2014 USD)") +
  theme_bw()

g.1.2

ggsave("Figure X_Chinese OF over Years.jpeg", g.1.2, width = 6, height = 5)

  
# Figure X: Westedu or not vs Chinese OF

westedu6 = aggregate(westedu5$total.volume1, by=list(westedu5$year, westedu5$westedu2), FUN=sum)

westedu6 = rename(westedu6, year=Group.1, Western_education=Group.2, total.amount=x)

westedu6$Western_education = as.factor(westedu6$Western_education)

g <- ggplot(westedu6, aes(x=year, y=total.amount, group=factor(Western_education))) +
  geom_line(aes(linetype=factor(Western_education)), color="black", size=1) + 
  scale_linetype_manual(values=c("dashed", "solid")) +  # Dashed for Without, solid for With
  labs(x="Year", y="Total Amount of Chinese OF (Constant 2014 USD)", 
       linetype="Western Education") +
  scale_linetype_discrete(labels=c("0 = Without", "1 = With")) +
  theme_bw() +
  theme(legend.position="bottom",
        legend.title=element_text(face="bold"),
        legend.text=element_text(size=9),
        axis.title=element_text(size=11),
        axis.text=element_text(size=9))

g

ggsave("Figure_Edu vs Chinese OF.jpeg", g, height = 5, width = 8)


# Figure: Edu Destination

# Load necessary libraries
df1 <- read.csv("westedu_destination.csv")

# Load ggplot2 for visualization
library(ggplot2)

# Count the occurrences of each destination, sorted from highest to lowest
destination_counts <- df1 %>% 
  count(destination) %>% 
  arrange(desc(n)) %>% 
  mutate(destination = factor(destination, levels = destination))

# Plotting the histogram (bar chart) for destinations
f1 <- ggplot(destination_counts, aes(x = destination, y = n)) +
  geom_bar(stat = "identity", fill = "black") +
  geom_text(aes(label = n), vjust = 0.3, hjust=-0.35, size = 3) +
  labs(
    x = "Destination",
    y = "Frequency",
    title = " "
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(size = 10, hjust = 1, face = "bold"),
    axis.text.y = element_text(size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.title = element_text(size = 14, face = "bold"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 1)
  ) +
  coord_flip()

f1
ggsave("Figure_Edu Destination.jpeg", f1, width = 6, height = 4)

# Figure: Edu Type

df <- read.csv("westedu_typeedu.csv")

# Rename degree labels
library(dplyr)
degree_mapping <- c(
  "Master and PhD" = "PhD + Master",
  "Phd" = "PhD",
  "Master" = "Master",
  "College and postgraduate" = "Postgraduate + College",
  "College" = "College",
  "High school and college" = "College + High School",
  "High school" = "High School"
)

# Relabel the degree column
df$degree_relabel <- recode(df$degree, !!!degree_mapping)

# Count the occurrences of each degree level
degree_counts <- df %>% 
  count(degree_relabel) %>% 
  mutate(degree_relabel = factor(degree_relabel, levels = c(
    "PhD + Master", "PhD", "Master", "Postgraduate + College", "College", "College + High School", "High School"
  )))

# Load ggplot2 for visualization
library(ggplot2)

f2 <- ggplot(degree_counts, aes(x = degree_relabel, y = n)) +
  geom_bar(stat = "identity", fill = "black") +
  geom_text(aes(label = n), vjust = 0.7, hjust = -0.4, size = 3) +
  labs(
    x = "Degree",
    y = "Frequency",
    title = ""
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(hjust = 1),
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.title = element_text(size = 9)
  ) +
  coord_flip()
f2

ggsave("Figure_Edu Type.jpeg", f2, width = 6, height = 3)

# Figure: Distribution of Leaders' Edu in Regions

data <- read.csv("westedu_region_percentage.csv")

region_order <- data %>%
  filter(Western_education == 0) %>%
  count(region2) %>%
  arrange(desc(n)) %>%
  pull(region2)

data$region2 <- factor(data$region2, levels = region_order)


# Create the plot
f4 <- ggplot(data, aes(x = region2, fill = factor(Western_education))) +
  geom_bar(position = "dodge", color="black") +
  geom_text(stat = "count", 
            aes(label = ..count..), 
            position = position_dodge(width = 0.9), 
            vjust = 0.5, 
            hjust = -0.2, 
            size = 3) +
  scale_fill_manual(values = c("0" = "black", "1" = "lightgray"), 
                    labels = c("Without", "With")) +
  labs(x = "Region", 
       y = "Number of Leaders", 
       fill = "Western Education", 
       title = "") +
  theme_bw(base_size = 14) +
  theme(legend.position = "bottom", 
        legend.title = element_text(size = 10), 
        legend.text = element_text(size = 9),
        axis.title.x = element_text(size = 9),
        axis.title.y = element_text(size = 9),
        axis.text = element_text(size = 10),
        plot.title = element_text(size = 14, face = "bold"),
        panel.grid.major.y = element_line(color = "gray", size = 0.2),
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_flip()
f4
ggsave("Figure_leaders in region_updated.jpeg", f4, width = 9, height = 5)



